import os
import numpy as np
import pandas as pd
# TensorFlow
import tensorflow as tf
from tensorflow.keras import layers, models
# cv
import cv2
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## progressbar
import progressbar
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
from matplotlib.font_manager import FontProperties
import matplotlib.colors as mcolors
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib import cm
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
The dataset was created by my team during the NASA Space Apps Challenge in 2018, the goal was to use the dataset to develop a model that can recognize the images with fire.
Data was collected to train a model to distinguish between the images that contain the fire (fire images) and regular images (non-fire images), so the whole problem was binary classification. Data is divided into 2 folders, fire images folder contains 755 outdoor-fire images some of them contains heavy smoke, the other one is non-fire images which contain 244 nature images (eg: forest, tree, grass, river, people, foggy forest, lake, animal, road, and waterfall). Hint: Data is skewed, which means the 2 classes(folders) don't have an equal number of samples, so make sure that you have a validation set with an equally sized number of images per class (eg: 40 images of both fire and non-fire classes).
def Path_Tree(PATH, Extension):
Out = {}
sep = ' ' * 3
BACK = {'Black': Back.BLACK, 'Red':Back.RED, 'Green':Back.GREEN, 'Yellow': Back.YELLOW, 'Blue': Back.BLUE,
'Magenta':Back.MAGENTA, 'Cyan': Back.CYAN}
title = PATH.split('\\')[-1]
print(Style.RESET_ALL + Fore.BLUE + Style.NORMAL + '=' * (len(title) +1) + Style.RESET_ALL)
print(Back.BLACK + Fore.CYAN + Style.NORMAL + title+':'+ Style.RESET_ALL)
print(Style.RESET_ALL + Fore.BLUE + Style.NORMAL + '=' * (len(title) +1)+ Style.RESET_ALL)
i = 0
C = ['Red', 'Magenta']
for entry in os.listdir(PATH):
if os.path.isdir(PATH):
print('└──',BACK[C[i]] + Fore.BLACK + Style.NORMAL + entry+':'+ Style.RESET_ALL)
Sub = os.path.join (PATH, entry)
List = os.listdir(Sub)
List = [x for x in List if x.endswith(Extension)]
# Out[entry] = [os.path.join(Sub, x) for x in List]
Out[entry] = List
print(2* sep, Back.YELLOW + Fore.BLACK + Style.NORMAL +
'%i %s files' % (len(List), List[0].split('.')[-1].upper()) + Style.RESET_ALL)
print(2* sep, ', '.join(List[:5]) + ', ...')
i+=1
return Out
Path = 'fire_dataset'
Files_dict = Path_Tree(Path, '.png')
============= fire_dataset: ============= └── fire_images: 755 PNG files fire.1.png, fire.10.png, fire.100.png, fire.101.png, fire.102.png, ... └── non_fire_images: 244 PNG files non_fire.1.png, non_fire.10.png, non_fire.100.png, non_fire.101.png, non_fire.102.png, ...
Temp = list(Files_dict.keys())
Labels_dict = dict(zip(Temp, [x.replace('_images','').replace('_',' ').title().replace('Non ','Non-') for x in Temp]))
Dataset = pd.DataFrame()
for c in list(Files_dict.keys()):
Temp = pd.DataFrame()
Temp['File'] = [c + '/' + x for x in Files_dict[c]]
Temp['Label'] = c.replace('_images','').replace('_',' ').title().replace('Non ','Non-')
Dataset = pd.concat([Dataset, Temp])
del Temp
display(Dataset)
| File | Label | |
|---|---|---|
| 0 | fire_images/fire.1.png | Fire |
| 1 | fire_images/fire.10.png | Fire |
| 2 | fire_images/fire.100.png | Fire |
| 3 | fire_images/fire.101.png | Fire |
| 4 | fire_images/fire.102.png | Fire |
| ... | ... | ... |
| 239 | non_fire_images/non_fire.95.png | Non-Fire |
| 240 | non_fire_images/non_fire.96.png | Non-Fire |
| 241 | non_fire_images/non_fire.97.png | Non-Fire |
| 242 | non_fire_images/non_fire.98.png | Non-Fire |
| 243 | non_fire_images/non_fire.99.png | Non-Fire |
999 rows × 2 columns
batch_size = 128
Img_Height = 180
Img_Width = 180
train_ds = tf.keras.preprocessing.image_dataset_from_directory(directory= Path, validation_split=0.2, subset="training",
seed=123, image_size=(Img_Height, Img_Width),
batch_size=batch_size)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(directory= Path, validation_split=0.2, subset="validation",
seed=123, image_size=(Img_Height, Img_Width),
batch_size=batch_size)
Found 999 files belonging to 2 classes. Using 800 files for training. Found 999 files belonging to 2 classes. Using 199 files for validation.
fig, ax = plt.subplots(4, 4 , figsize = (16, 16))
ax = ax.ravel()
class_names = train_ds.class_names
for images, labels in train_ds.take(1):
for i in range(len(ax)):
_ = ax[i].imshow(images[i].numpy().astype("uint8"))
if labels[i].numpy() == 0:
Color = 'Red'
else:
Color = 'SeaGreen'
_ = ax[i].set_title(Labels_dict[class_names[labels[i]]],
fontweight='bold', fontsize = 14, color = Color)
_ = ax[i].axis("off")
_ = ax[i].set_aspect(1)
A multi-layer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The algorithm at each iteration uses the Cross-Entropy Loss to measure the loss, and then the gradient and the model update is calculated. At the end of this iterative process, we would reach a better level of agreement between test and predicted sets since the error would be lower from that of the first step.
model = models.Sequential([layers.experimental.preprocessing.Rescaling(1./255, input_shape=(Img_Height, Img_Width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(len(Labels_dict))])
model.summary()
tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True, expand_nested = True)
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= rescaling (Rescaling) (None, 180, 180, 3) 0 _________________________________________________________________ conv2d (Conv2D) (None, 180, 180, 16) 448 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 90, 90, 16) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 90, 90, 32) 4640 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 45, 45, 32) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 45, 45, 64) 18496 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 22, 22, 64) 0 _________________________________________________________________ flatten (Flatten) (None, 30976) 0 _________________________________________________________________ dense (Dense) (None, 128) 3965056 _________________________________________________________________ dense_1 (Dense) (None, 2) 258 ================================================================= Total params: 3,988,898 Trainable params: 3,988,898 Non-trainable params: 0 _________________________________________________________________
Compiling and fitting the model
# Number of iterations
IT = 21
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
# Training the model
history = model.fit(train_ds, validation_data=val_ds, epochs=IT, verbose = 0)
def Search_List(Key, List): return [s for s in List if Key in s]
Metrics_Names = {'loss':'Loss', 'accuracy':'Accuracy', 'mae':'MAE', 'mse':'MSE', 'recall': 'Recall'}
def Table_modify(df, Metrics_Names = Metrics_Names):
df = df.rename(columns = Metrics_Names)
df = df.reindex(sorted(df.columns), axis=1)
df.insert(loc = 0, column = 'Iteration', value = np.arange(0, df.shape[0]), allow_duplicates=False)
return df
Validation_Table = Search_List('val_',history.history.keys())
Train_Table = list(set( history.history.keys()) - set(Validation_Table))
Validation_Table = pd.DataFrame(np.array([history.history[x] for x in Validation_Table]).T, columns = Validation_Table)
Train_Table = pd.DataFrame(np.array([history.history[x] for x in Train_Table]).T, columns = Train_Table)
Validation_Table.columns = [x.replace('val_','') for x in Validation_Table.columns]
Train_Table = Table_modify(Train_Table)
Validation_Table = Table_modify(Validation_Table)
# Train Set Score
score = model.evaluate(train_ds, batch_size = batch_size, verbose = 0)
score = pd.DataFrame(score, index = model.metrics_names).T
score.index = ['Train Set Score']
# Validation Set Score
Temp = model.evaluate(val_ds, batch_size = batch_size, verbose = 0)
Temp = pd.DataFrame(Temp, index = model.metrics_names).T
Temp.index = ['Validation Set Score']
score = score.append(Temp)
score.rename(columns= Metrics_Names, inplace = True)
score = score.reindex(sorted(score.columns), axis=1)
display(score.style.set_precision(4))
| Accuracy | Loss | |
|---|---|---|
| Train Set Score | 1.0000 | 0.0064 |
| Validation Set Score | 0.9698 | 0.2427 |
def Plot_history(history, PD, Title = False, metrics_names = [x.title() for x in model.metrics_names]):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
Colors = ['OrangeRed', 'MidnightBlue', 'purple']
for j in range(len(metrics_names)):
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history[metrics_names[j]].values,
line=dict(color=Colors[j], width= 1.5), name = metrics_names[j]), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, PD['yLim']], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
if not PD['Table_Rows'] == None:
ind = np.linspace(0, history.shape[0], PD['Table_Rows'], endpoint = False).round(0).astype(int)
ind = np.append(ind, history.index[-1])
history = history[history.index.isin(ind)]
T = history.copy()
T[metrics_names] = T[metrics_names].applymap(lambda x: '%.4e' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
TableColors = PD['TableColors']
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color=TableColors[0],
fill_color=TableColors[0], align=['center','center'], font=dict(color=TableColors[1], size=12), height=25),
columnwidth = PD['tablecolumnwidth'], cells=dict(values=Temp, line_color=TableColors[0],
fill=dict(color=[TableColors[1], TableColors[1]]),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
if Title != False:
fig.update_layout(plot_bgcolor= 'white',
title={'text': Title, 'x':0.46, 'y':0.94, 'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
PD = dict(Table_Rows = 25, yLim = 1.2, tablecolumnwidth = [0.3, 0.4, 0.4], TableColors = ['Navy','White'])
Plot_history(Train_Table, Title = 'Train Set', PD = PD)
Plot_history(Validation_Table, Title = 'Validation Set', PD = PD)
Here, we only went through a few iterations; however, we need to train the model for more iterations to get more accurate results.